import pandas as pd, numpy as np, scanpy as sc
import plotly
import plotly.express as px
from asctb_ct_label_mapper.utilities.nlp_preprocessing import execute_nlp_pipeline
plotly.offline.init_notebook_mode()
def fetch_and_parse_crosswalk_table(crosswalk_filename='Azimuth_CellTypist_PopV_Lung_ASCTB_Crosswalks.csv', raw_labels_column='raw_input_column', asctb_crosswalk_column='translation_column', verbose=False):
"""Processes the final-crosswalk data containing SME feedback for translating raw-labels into ASCTB naming convention.
Args:
crosswalk_filename (str, optional): Defaults to 'Azimuth_CellTypist_PopV_Lung_ASCTB_Crosswalks.csv'.
raw_labels_column (str, optional): Defaults to 'raw_input_column'.
asctb_crosswalk_column (str, optional): Defaults to 'translation_column'.
verbose (bool, optional): Flag to indicate logging in verbose mode. Defaults to False.
Returns:
pd.DataFrame: Contains the final-crosswalk information.
"""
# Merge the aggregated data with the translations file created using our ASCTB-Mapper package with finalized SME feedback
crosswalk_df = pd.read_csv(crosswalk_filename)
crosswalk_df['source'] = crosswalk_df['source'].replace('Azimuth-HLCAv2', 'azimuth').replace('PopV-Lung', 'popv').replace('CellTypist-Lung', 'celltypist')
crosswalk_df[raw_labels_column] = crosswalk_df[raw_labels_column].str.lower().replace('φ','ï†')
crosswalk_df['asctb_equivalent'] = crosswalk_df[asctb_crosswalk_column].replace('?', np.nan)
crosswalk_df.loc[crosswalk_df['asctb_equivalent'].isna(), 'asctb_equivalent'] = crosswalk_df.loc[crosswalk_df['asctb_equivalent'].isna(), 'best_matched_asctb_label']
crosswalk_df['asctb_equivalent'] = crosswalk_df['asctb_equivalent'].str.lower().replace('φ','ï†')
return crosswalk_df
def get_crosswalk_translation_hmap(crosswalk_df):
"""Create a hashmap of Gloria's crosswalk for cell-type labels from Azimuth/CellTypist/PopV -> ASCTB naming conventions.
Args:
crosswalk_df (pd.DataFrame): DataFrame containing final crosswalk information. Essential columns: `[unique_cts, asctb_equivalent]`.
"""
translation_hmap = dict(
zip(
crosswalk_df['unique_cts'], crosswalk_df['asctb_equivalent']
)
)
return translation_hmap
def clean_and_translate_annotation(input_label):
return ' '.join([execute_nlp_pipeline(word) for word in input_label.split()])
# Read and preprocess the Crosswalk table with SME Feedback to translate annotations -> ASCTB convention
crosswalk_df = fetch_and_parse_crosswalk_table(
crosswalk_filename='ASCTB_Mapper/Azimuth_CellTypist_PopV_Lung_ASCTB_Crosswalks.csv',
raw_labels_column='raw_input_label',
asctb_crosswalk_column='Glorias_recco_for_ASCTB_Crosswalk',
verbose=False
)
crosswalk_df = crosswalk_df.rename({'raw_input_label':'unique_cts'}, axis=1)
translation_hmap = get_crosswalk_translation_hmap(crosswalk_df)
ANNDATA_FOLDER = 'Datasets'
QUERY_DATASET_NAME = 'LCA'
# "LCA.h5ad" file is pre-annotated
predictions_adata = sc.read_h5ad(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/{QUERY_DATASET_NAME}.h5ad')
# Get rid of the individual cell-level preds from PopV. Keep only majority_voting in obs dataframe
popv_lung_all_preds_df = pd.read_csv(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/popv_preds.csv')
popv_lung_all_preds_df['popv_majority_vote_prediction_original'] = popv_lung_all_preds_df['popv_majority_vote_prediction']
for c in 'popv_prediction', 'popv_majority_vote_prediction':
popv_lung_all_preds_df[c] = popv_lung_all_preds_df[c].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))
popv_lung_preds_df = popv_lung_all_preds_df[['popv_majority_vote_prediction','popv_prediction_score']].rename({'popv_majority_vote_prediction':'raw_predicted_labels'}, axis=1)
popv_lung_preds_df['predicted_labels'] = popv_lung_preds_df['raw_predicted_labels']
predictions_adata.obs['popv_preds'] = popv_lung_preds_df['predicted_labels'].tolist()
predictions_adata.obs['popv_scores'] = popv_lung_preds_df['popv_prediction_score'].tolist()
# Get rid of the individual cell-level preds from CellTypist. Keep only majority_voting in obs dataframe
celltypist_lung_all_preds_df = pd.read_csv(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/celltypist_preds.csv')
celltypist_lung_all_preds_df['majority_voting_original'] = celltypist_lung_all_preds_df['majority_voting']
for c in ['predicted_labels', 'majority_voting']:
celltypist_lung_all_preds_df[c] = celltypist_lung_all_preds_df[c].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))
celltypist_lung_preds_df = celltypist_lung_all_preds_df[['majority_voting','conf_score']].rename({'majority_voting':'raw_predicted_labels'}, axis=1)
celltypist_lung_preds_df['predicted_labels'] = celltypist_lung_preds_df['raw_predicted_labels']
predictions_adata.obs['celltypist_preds'] = celltypist_lung_preds_df['predicted_labels'].tolist()
predictions_adata.obs['celltypist_scores'] = celltypist_lung_preds_df['conf_score'].tolist()
# Merge in Azimuth predictions in obs dataframe. Keep only finest level predictions in obs dataframe.
azimuth_lung_all_preds_df = pd.read_csv(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/azimuth_preds.tsv', sep=' ')
azimuth_lung_all_preds_df['predicted.ann_finest_level_original'] = azimuth_lung_all_preds_df['predicted.ann_finest_level']
for c in ['predicted.ann_level_1','predicted.ann_level_2','predicted.ann_level_3','predicted.ann_level_4','predicted.ann_level_5','predicted.ann_finest_level']:
azimuth_lung_all_preds_df[c] = azimuth_lung_all_preds_df[c].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))
azimuth_lung_preds_df = azimuth_lung_all_preds_df[['predicted.ann_finest_level', 'predicted.ann_finest_level.score']].rename({'predicted.ann_finest_level':'raw_predicted_labels'}, axis=1)
azimuth_lung_preds_df['predicted_labels'] = azimuth_lung_preds_df['raw_predicted_labels']
predictions_adata.obs['azimuth_preds'] = azimuth_lung_preds_df['predicted_labels'].tolist()
predictions_adata.obs['azimuth_scores'] = azimuth_lung_preds_df['predicted.ann_finest_level.score'].tolist()
predictions_adata.obs.columns
Index(['method', 'donor', 'cell_ontology_type', 'donor_method',
'cell_ontology_id', 'popv_preds', 'popv_scores', 'celltypist_preds',
'celltypist_scores', 'azimuth_preds', 'azimuth_scores'],
dtype='object')
We need to recreate the logic that the Tabula-Sapiens authors used to merge the 2 source datasets to create the merged LCA dataset.
Trying to retrace the indexes in the target dataframe, back to the source dataframe columns.
Found that the index in the target LCA.obs df was cell.id+-1 in the 10x data.
Found that the index in the target LCA.obs df was cell.id+-2 in the smartseq2 data.
All of this investigation effort is required because haven't yet received clarification from their team's developer, and they don't have this documented anywhere.
This entire trace-back activity is required so that we can pull the compartment column from the 2 source dataframes into our target LCA dataframe containing popv_preds and then show a breakdown of compartments.
lca_smart_seq2_name = 'LCA_Smart_seq2'
lca_10x_name = 'LCA_10x'
lca_smart_seq2_adata = sc.read_h5ad(f'{ANNDATA_FOLDER}/{lca_smart_seq2_name}/{lca_smart_seq2_name}.h5ad')
lca_10x_adata = sc.read_h5ad(f'{ANNDATA_FOLDER}/{lca_10x_name}/{lca_10x_name}.h5ad')
lca_smart_seq2_adata.obs.head(2).T
| index | A1_B002014.gencode.vH29 | A1_B003138.gencode.vH29 |
|---|---|---|
| nGene | 1724 | 2311 |
| nReads | 542313 | 1135638 |
| plate.barcode | B002014 | B003138 |
| cell.id | A1_B002014.gencode.vH29 | A1_B003138.gencode.vH29 |
| region | normal | normal |
| label | Ecpam, CD45 | Ecpam, CD45 |
| sorter | ahmad/kyle/lolita | ahmad/kyle/lolita |
| sort.location | biohub | biohub |
| sample | medial 2 | medial 2 |
| location | medial | medial |
| donor_id | 2 | 2 |
| percent.ercc | 0.098967 | 0.078368 |
| percent.ribo | 0.011503 | 0.026572 |
| gating | nan | nan |
| free_annotation | Capillary Aerocyte | Capillary Aerocyte |
| Number of splices: Total | 487297 | 957485 |
| Number of splices: Annotated (sjdb) | 487067 | 957386 |
| Number of splices: GT-AG | 482193 | 949158 |
| Number of splices: GC-AG | 3466 | 6707 |
| Number of splices: AT-AC | 277 | 304 |
| Number of splices: Non-canonical | 1361 | 1316 |
| Mapping speed, Million of reads per hour | 49.785 | 52.45 |
| Average input read length | 238.5 | 239.0 |
| compartment | endothelial | endothelial |
| tissue_ontology_term_id | UBERON:0002048 | UBERON:0002048 |
| disease_ontology_term_id | PATO:0000461 | PATO:0000461 |
| development_stage_ontology_term_id | HsapDv:0000140 | HsapDv:0000140 |
| assay_ontology_term_id | EFO:0008931 | EFO:0008931 |
| cell_type_ontology_term_id | CL:0000115 | CL:0000115 |
| self_reported_ethnicity_ontology_term_id | unknown | unknown |
| sex_ontology_term_id | PATO:0000384 | PATO:0000384 |
| is_primary_data | True | True |
| organism_ontology_term_id | NCBITaxon:9606 | NCBITaxon:9606 |
| suspension_type | cell | cell |
| cell_type | endothelial cell | endothelial cell |
| assay | Smart-seq2 | Smart-seq2 |
| disease | normal | normal |
| organism | Homo sapiens | Homo sapiens |
| sex | male | male |
| tissue | lung | lung |
| self_reported_ethnicity | unknown | unknown |
| development_stage | 46-year-old human stage | 46-year-old human stage |
lca_10x_adata.obs.head(2).T
| index | P2_1_AAACCTGAGAAACCAT | P2_1_AAATGCCAGATGAGAG |
|---|---|---|
| nGene | 1347 | 1713 |
| nUMI | 2914 | 4226 |
| channel | P2_1 | P2_1 |
| region | normal | normal |
| percent.ribo | 0.035347 | 0.061051 |
| free_annotation | Capillary Aerocyte | Capillary Aerocyte |
| donor_id | 2 | 2 |
| sample | distal 2 | distal 2 |
| location | distal | distal |
| magnetic.selection | epithelial | epithelial |
| preparation.site | biohub | biohub |
| compartment | endothelial | endothelial |
| tissue_ontology_term_id | UBERON:0002048 | UBERON:0002048 |
| assay_ontology_term_id | EFO:0009899 | EFO:0009899 |
| disease_ontology_term_id | PATO:0000461 | PATO:0000461 |
| development_stage_ontology_term_id | HsapDv:0000140 | HsapDv:0000140 |
| cell_type_ontology_term_id | CL:0000115 | CL:0000115 |
| self_reported_ethnicity_ontology_term_id | unknown | unknown |
| sex_ontology_term_id | PATO:0000384 | PATO:0000384 |
| is_primary_data | True | True |
| organism_ontology_term_id | NCBITaxon:9606 | NCBITaxon:9606 |
| suspension_type | cell | cell |
| cell_type | endothelial cell | endothelial cell |
| assay | 10x 3' v2 | 10x 3' v2 |
| disease | normal | normal |
| organism | Homo sapiens | Homo sapiens |
| sex | male | male |
| tissue | lung | lung |
| self_reported_ethnicity | unknown | unknown |
| development_stage | 46-year-old human stage | 46-year-old human stage |
Original 10x data seems to have
obs['cell.id'] + '-1'as the index in the final merged LCA data
final_10x_indexes = predictions_adata.obs.loc[predictions_adata.obs['method']=='10X'].index.tolist()
original_10x_values = lca_10x_adata.obs.index.tolist()
original_10x_values = [x + '-1' for x in original_10x_values]
from asctb_ct_label_mapper.utilities.plotting import make_venn_diagram
try:
fig = make_venn_diagram(
A=set(final_10x_indexes),
B=set(original_10x_values),
labels=['Final 10x Indexes in our query-data', 'Original 10x Indexes in our source data'],
title='Backtracking the indexes for the 10x Data: Everything should be just one set'
)
except:
pass
Original smartseq2 data seems to have
obs['cell.id'] + '-2'as the index in the final merged LCA data
final_smartseq2_indexes = predictions_adata.obs.loc[predictions_adata.obs['method']=='smartseq2'].index.tolist()
original_smartseq2_values = lca_smart_seq2_adata.obs['cell.id'].values.tolist()
original_smartseq2_values = [x + '-2' for x in original_smartseq2_values]
from asctb_ct_label_mapper.utilities.plotting import make_venn_diagram
try:
fig = make_venn_diagram(
A=set(final_smartseq2_indexes),
B=set(original_smartseq2_values),
labels=['Final Smartseq2 Indexes in our query-data', 'Original Smartseq2 Indexes in our source data'],
title='Backtracking the indexes for the Smartseq2 Data: Everything should be just one set'
)
except:
pass
annotations to compartments in the raw-datasets¶lca_10x_adata.obs['10x_index'] = [x + '-1' for x in lca_10x_adata.obs.index]
lca_10x_adata.obs['assay'] = ['10X' for x in lca_10x_adata.obs.index]
lca_smart_seq2_adata.obs['smart_seq2_index'] = [x + '-2' for x in lca_smart_seq2_adata.obs['cell.id'].tolist()]
lca_smart_seq2_adata.obs['assay'] = ['smartseq2' for x in lca_smart_seq2_adata.obs.index]
compartments_df = pd.concat(
[
lca_10x_adata.obs[['assay','cell_type','10x_index','compartment']].reset_index(drop=True).rename({'10x_index':'index'}, axis=1),
lca_smart_seq2_adata.obs[['assay','cell_type','smart_seq2_index','compartment']].reset_index(drop=True).rename({'smart_seq2_index':'index'}, axis=1)
],
axis=0
)
compartments_df
| assay | cell_type | index | compartment | |
|---|---|---|---|---|
| 0 | 10X | endothelial cell | P2_1_AAACCTGAGAAACCAT-1 | endothelial |
| 1 | 10X | endothelial cell | P2_1_AAATGCCAGATGAGAG-1 | endothelial |
| 2 | 10X | endothelial cell | P2_1_AACACGTTCGATCCCT-1 | endothelial |
| 3 | 10X | endothelial cell | P2_1_AACACGTTCGCACTCT-1 | endothelial |
| 4 | 10X | endothelial cell | P2_1_AACCATGCAGCTCGCA-1 | endothelial |
| ... | ... | ... | ... | ... |
| 9404 | smartseq2 | lung ciliated cell | M5_B001771.gencode.vH29-2 | epithelial |
| 9405 | smartseq2 | lung ciliated cell | N2_B001769.gencode.vH29-2 | epithelial |
| 9406 | smartseq2 | lung ciliated cell | N2_B002460.gencode.vH29-2 | epithelial |
| 9407 | smartseq2 | lung ciliated cell | O2_B001774.gencode.vH29-2 | epithelial |
| 9408 | smartseq2 | lung ciliated cell | O7_B001774.gencode.vH29-2 | epithelial |
75071 rows × 4 columns
mapping_compartments_df = compartments_df[['cell_type','compartment']].drop_duplicates().sort_values(by=['cell_type'])
mapping_compartments_df['cell_type_asctb_equivalent'] = mapping_compartments_df['cell_type'].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))
mapping_compartments_df
| cell_type | compartment | cell_type_asctb_equivalent | |
|---|---|---|---|
| 9971 | B cell | immune | b cell |
| 9944 | CD1c-positive myeloid dendritic cell | immune | CD1c-positive myeloid dendritic cell |
| 10116 | alveolar macrophage | immune | alveolar macrophage |
| 32352 | basal cell | epithelial | basal cell |
| 26973 | bronchial smooth muscle cell | stromal | bronchial smooth muscle cell |
| 3237 | capillary endothelial cell | endothelial | cap1 general capillary gcap |
| 64536 | ciliated cell | epithelial | ciliated cell |
| 26224 | classical monocyte | immune | classical monocyte |
| 20465 | club cell | epithelial | club cell |
| 9714 | dendritic cell | immune | migratory dendritic cell |
| 7175 | dendritic cell, human | immune | dendritic cell, human |
| 16969 | effector memory CD4-positive, alpha-beta T cell | immune | effector memory CD4-positive, alpha-beta T cell |
| 22009 | effector memory CD8-positive, alpha-beta T cell | immune | effector memory CD8-positive, alpha-beta T cell |
| 0 | endothelial cell | endothelial | endothelial cell |
| 18276 | endothelial cell of artery | endothelial | endothelial cell of artery |
| 21648 | endothelial cell of lymphatic vessel | endothelial | lymphatic endothelial cell |
| 57177 | epithelial cell | epithelial | epithelial cell |
| 27978 | fibroblast | stromal | fibroblast of lung |
| 26516 | intermediate monocyte | immune | classical monocyte |
| 27231 | lung ciliated cell | epithelial | lung ciliated cell |
| 64624 | lung goblet cell | epithelial | lung goblet cell |
| 61811 | lung neuroendocrine cell | epithelial | lung neuroendocrine cell |
| 22423 | lymphocyte | immune | lymphocyte |
| 21902 | macrophage | immune | macrophage |
| 22482 | mature NK T cell | immune | mature nk t cell |
| 32016 | megakaryocyte | immune | megakaryocyte |
| 38508 | mesothelial cell of pleura | stromal | mesothelial cell of pleura |
| 25252 | monocyte | immune | monocyte |
| 20574 | mucus secreting cell | epithelial | mucus secreting cell |
| 9826 | myeloid dendritic cell, human | immune | myeloid dendritic cell, human |
| 25459 | myeloid leukocyte | immune | myeloid leukocyte |
| 28380 | myofibroblast cell | stromal | myofibroblast cell |
| 16891 | naive thymus-derived CD4-positive, alpha-beta ... | immune | naive thymus-derived CD4-positive, alpha-beta ... |
| 16378 | naive thymus-derived CD8-positive, alpha-beta ... | immune | naive thymus-derived CD8-positive, alpha-beta ... |
| 22574 | natural killer cell | immune | natural killer cell |
| 3040 | neutrophil | immune | neutrophil |
| 26704 | non-classical monocyte | immune | non-classical monocyte |
| 19244 | pericyte | stromal | lung pericyte |
| 32447 | plasma cell | immune | plasma cell |
| 9891 | plasmacytoid dendritic cell, human | immune | plasmacytoid dendritic cell, human |
| 38473 | pulmonary interstitial fibroblast | stromal | pulmonary interstitial fibroblast |
| 64002 | pulmonary ionocyte | epithelial | pulmonary ionocyte |
| 21467 | respiratory basal cell | epithelial | respiratory basal cell |
| 64024 | tracheobronchial serous cell | epithelial | tracheobronchial serous cell |
| 28503 | type I pneumocyte | epithelial | type i pneumocyte |
| 20932 | type II pneumocyte | epithelial | type ii pneumocyte |
| 20379 | vascular associated smooth muscle cell | stromal | vascular smooth muscle cell |
| 17474 | vein endothelial cell | endothelial | pulmonary venous endothelial cell |
popv_compartments_df = pd.merge(predictions_adata.obs['popv_preds'], mapping_compartments_df, how='left', left_on='popv_preds', right_on='cell_type_asctb_equivalent')
popv_compartments_df['compartment'] = popv_compartments_df['compartment'].tolist()
# Received Ellen's comments on 16th April.
# Manually imputing the compartment for these PopV-labels since they weren't present in the source datasets.
sme_feedback_for_compartments = {
'cd8+ t cell naive':'immune',
'cd4+ t cell naive':'immune',
'bronchial goblet cell':'epithelial',
'suprabasal cell':'epithelial',
'plasmacytoid dendritic cell':'immune',
'mesothelial cell':'stromal',
'serous secreting cell of bronchus submucosal gland':'epithelial',
'arterial endothelial cell':'endothelial',
'smooth muscle cell':'stromal',
'cd4+ t cell effector memory':'immune'
}
popv_compartments_df.loc[popv_compartments_df['compartment'].isna(), 'compartment'] = popv_compartments_df.loc[popv_compartments_df['compartment'].isna(), 'popv_preds'].apply(lambda x: sme_feedback_for_compartments[x])
popv_compartments_df = pd.DataFrame(popv_compartments_df.groupby(by=['popv_preds','compartment'])['popv_preds'].count())
popv_compartments_df.columns = ['count']
popv_compartments_df = popv_compartments_df.reset_index()
popv_compartments_df.sort_values(by=['compartment', 'count'], ascending=[False, False])
| popv_preds | compartment | count | |
|---|---|---|---|
| 12 | fibroblast of lung | stromal | 2428 |
| 14 | lung pericyte | stromal | 2125 |
| 4 | bronchial smooth muscle cell | stromal | 1107 |
| 31 | vascular smooth muscle cell | stromal | 571 |
| 18 | mesothelial cell | stromal | 30 |
| 27 | smooth muscle cell | stromal | 5 |
| 16 | macrophage | immune | 15245 |
| 17 | mature nk t cell | immune | 5289 |
| 8 | cd8+ t cell naive | immune | 5172 |
| 9 | classical monocyte | immune | 5100 |
| 7 | cd4+ t cell naive | immune | 3911 |
| 20 | neutrophil | immune | 2107 |
| 21 | non-classical monocyte | immune | 988 |
| 1 | b cell | immune | 845 |
| 19 | migratory dendritic cell | immune | 630 |
| 22 | plasma cell | immune | 203 |
| 23 | plasmacytoid dendritic cell | immune | 139 |
| 6 | cd4+ t cell effector memory | immune | 4 |
| 30 | type ii pneumocyte | epithelial | 5125 |
| 13 | lung ciliated cell | epithelial | 1977 |
| 10 | club cell | epithelial | 1972 |
| 29 | type i pneumocyte | epithelial | 1521 |
| 2 | basal cell | epithelial | 1151 |
| 3 | bronchial goblet cell | epithelial | 979 |
| 28 | suprabasal cell | epithelial | 417 |
| 24 | pulmonary ionocyte | epithelial | 81 |
| 26 | serous secreting cell of bronchus submucosal g... | epithelial | 19 |
| 5 | cap1 general capillary gcap | endothelial | 15160 |
| 11 | endothelial cell of artery | endothelial | 1566 |
| 25 | pulmonary venous endothelial cell | endothelial | 1240 |
| 15 | lymphatic endothelial cell | endothelial | 503 |
| 0 | arterial endothelial cell | endothelial | 11 |
import plotly.graph_objects as go
# Define the dropdown options
dropdown_options = [{'label': 'All compartments', 'value': 'all'},
{'label': 'Epithelial', 'value': 'epithelial'},
{'label': 'Endothelial', 'value': 'endothelial'},
{'label': 'Stromal', 'value': 'stromal'},
{'label': 'Immune', 'value': 'immune'}]
# Define the first trace with the aggregated counts
total_count = popv_compartments_df['count'].sum()
fig = go.Figure([
go.Pie(
values=popv_compartments_df['count'],
labels=popv_compartments_df['compartment'],
name='All compartments',
hole=0.4,
)
])
fig.update_layout(
title='Breakdown of all compartments for PopV predicted labels',
)
fig.show()
import plotly.graph_objects as go
fig = go.Figure()
dropdown_options = [
# {'label': 'All', 'value': 'all'},
{'label': 'Endothelial', 'value': 'endothelial'},
{'label': 'Epithelial', 'value': 'epithelial'},
{'label': 'Immune', 'value': 'immune'},
{'label': 'Stromal', 'value': 'stromal'},
]
fig = go.Figure()
# Define the first trace with the aggregated counts
# fig.add_trace(
# go.Pie(
# values=popv_compartments_df['count'],
# labels=popv_compartments_df['compartment'],
# name='all compartments',
# hole=0.4,
# visible=True,
# )
# )
# Define the other traces for each compartment
for compartment in sorted(popv_compartments_df['compartment'].unique()):
print(compartment)
fig.add_trace(
go.Pie(
values=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'count'],
labels=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'popv_preds'],
name=f'Compartment {compartment}',
hole=0.4,
visible=False
)
)
# Update the layout with the dropdown
fig.update_layout(
title=f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset',
updatemenus=[
dict(
buttons=[
dict(
label=dropdown_option['label'],
method='update',
args=[
{'visible': #[dropdown_option['value']=='all']+
[dropdown_option['value'] in trace.name for trace in fig.data]},
{
'title': f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset' if dropdown_option['value']=='all'
else dropdown_option['label'] +
f' Compartment - {popv_compartments_df.loc[popv_compartments_df["compartment"] == dropdown_option["value"], "popv_preds"].nunique()}' +
f' unique Cell-Type annotations for PopV predicted labels in the {QUERY_DATASET_NAME} dataset'
}
]
)
for dropdown_option in dropdown_options
]
)
]
)
fig.show()
endothelial epithelial immune stromal
import plotly.graph_objects as go
fig = go.Figure()
dropdown_options = [
{'label': 'Overview of compartments', 'value': 'all'},
{'label': 'Endothelial', 'value': 'endothelial'},
{'label': 'Epithelial', 'value': 'epithelial'},
{'label': 'Immune', 'value': 'immune'},
{'label': 'Stromal', 'value': 'stromal'},
]
fig = go.Figure()
# Define the first trace with the aggregated counts
print('all')
fig.add_trace(
go.Pie(
values=popv_compartments_df['count'],
labels=popv_compartments_df['compartment'],
name='all compartments',
hole=0.4,
visible=True,
)
)
# Define the other traces for each compartment
for compartment in sorted(popv_compartments_df['compartment'].unique()):
print(compartment)
fig.add_trace(
go.Pie(
values=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'count'],
labels=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'popv_preds'],
name=f'Compartment {compartment}',
hole=0.4,
visible=False
)
)
# Update the layout with the dropdown
fig.update_layout(
title=f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset',
updatemenus=[
dict(
buttons=[
dict(
label=dropdown_option['label'],
method='update',
args=[
{'visible':
[dropdown_option['value'] in trace.name for trace in fig.data]},
{
'title': f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset' if dropdown_option['value']=='all'
else dropdown_option['label'] +
f' Compartment - {popv_compartments_df.loc[popv_compartments_df["compartment"] == dropdown_option["value"], "popv_preds"].nunique()}' +
f' unique Cell-Type annotations for PopV predicted labels in the {QUERY_DATASET_NAME} dataset'
}
]
)
for dropdown_option in dropdown_options
]
)
]
)
fig.show()
all endothelial epithelial immune stromal